! (C) Copyright 1995- ECMWF.
! (C) Copyright 1995- Meteo-France.
! 
! This software is licensed under the terms of the Apache Licence Version 2.0
! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
! In applying this licence, ECMWF does not waive the privileges and immunities
! granted to it by virtue of its status as an intergovernmental organisation
! nor does it submit to any jurisdiction.
!

MODULE SUTRLE_MOD
CONTAINS
SUBROUTINE SUTRLE(PNM,KGL,KLOOP)

!**** *sutrle * - transposition of Legendre polynomials during set-up

!     Purpose.
!     --------
!           transposition of Legendre polynomials during set-up

!**   Interface.
!     ----------
!        *call* *sutrle(pnm)

!        Explicit arguments :
!        --------------------

!        Implicit arguments :
!        --------------------

!     Method.
!     -------
!        See documentation

!     Externals.
!     ----------

!     Reference.
!     ----------
!        ECMWF Research Department documentation of the IFS

!     Author.
!     -------
!        MPP Group *ECMWF*

!     Modifications.
!     --------------
!        Original : 95-10-01
!        P.Towers : 10-01-12  Corrected over allocation of ZSNDBUF (XT4 fix)
!        G.Mozdzynski: March 2011 Support 2D (RW,RV) initialisation of legendre coeffs
!      F. Vana  05-Mar-2015  Support for single precision
!     ------------------------------------------------------------------


USE EC_PARKIND,      ONLY: JPRD, JPIM
USE MPL_MODULE,      ONLY: MPL_ALLREDUCE, MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, &
  &                        JP_NON_BLOCKING_STANDARD
USE TPM_DIM,         ONLY: R
USE TPM_DISTR,       ONLY: D, MTAGLETR, NPRCIDS, NPRTRW, NPRTRV, MYSETV, MYSETW, NPROC
USE TPM_FIELDS,      ONLY: F
USE SET2PE_MOD,      ONLY: SET2PE
USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS
!

IMPLICIT NONE

REAL(KIND=JPRD),INTENT(IN) :: PNM(:)
INTEGER(KIND=JPIM),INTENT(IN) :: KGL
INTEGER(KIND=JPIM),INTENT(IN) :: KLOOP

!     LOCAL

REAL(KIND=JPRD), ALLOCATABLE :: ZSNDBUFV(:),ZRCVBUFV(:,:)
REAL(KIND=JPRD), ALLOCATABLE :: ZSNDBUFW(:,:),ZRCVBUFW(:,:)
INTEGER(KIND=JPIM) :: IM, IPOS, &
             & IRECVSET, IRECV, ISEND, ISENDSET, ITAG,ISENDSIZE, IRECVSIZE, &
             & J, JM, JMLOC, JN, JV, JROC ,IOFFT, IOFFG, IGL, ISREQ, IRREQ
INTEGER(KIND=JPIM) :: ISENDREQ(MAX(NPRTRW,NPRTRV))
INTEGER(KIND=JPIM) :: IRECVREQ(MAX(NPRTRW,NPRTRV))
INTEGER(KIND=JPIM) :: IGLVS(NPRTRV)
INTEGER(KIND=JPIM) :: IGLVR(NPRTRV)
INTEGER(KIND=JPIM) :: IPOSW(NPRTRW)

!     ------------------------------------------------------------------

!*       0.    Some initializations.
!              ---------------------

ITAG = MTAGLETR+KLOOP

! Perform barrier synchronisation to guarantee all processors have
! completed all previous communication

IF( NPROC > 1 .AND. KLOOP ==1)THEN
  CALL GSTATS(783,0)
  CALL MPL_BARRIER(CDSTRING='SUTRLE:')
  CALL GSTATS(783,1)
ENDIF

!
! First do communications in NPRTRV direction
!

!*     Calculate send buffer size

IF(KGL > 0) THEN
  ISENDSIZE = R%NSPOLEG+1
ELSE
  ISENDSIZE=1
ENDIF

ALLOCATE (ZSNDBUFV(ISENDSIZE))
ALLOCATE (ZRCVBUFV(R%NSPOLEG+1,NPRTRV))

!*   copy data to be sent into zsndbufv

ZSNDBUFV(1) = KGL
IF(KGL > 0) THEN
  CALL GSTATS(1141,0)
!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(J)
  DO J=1,R%NSPOLEG
    ZSNDBUFV(J+1) = PNM(J)
  ENDDO
!$OMP END PARALLEL DO
  CALL GSTATS(1141,1)
ENDIF

IRREQ=0
DO JROC=1,NPRTRV-1
  IRECV = MYSETV+JROC
  IF (IRECV > NPRTRV) IRECV = IRECV-NPRTRV
  IRECVSET = IRECV
  CALL SET2PE(IRECV,0,0,MYSETW,IRECVSET)
  IRREQ = IRREQ+1
  CALL GSTATS(801,0)
  CALL MPL_RECV(ZRCVBUFV(:,IRECVSET),KSOURCE=NPRCIDS(IRECV), &
   &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),&
   & KTAG=ITAG,CDSTRING='SUTRLE:')
  CALL GSTATS(801,1)
ENDDO

ISREQ = 0
DO JROC=1,NPRTRV-1
  ISEND = MYSETV-JROC
  IF (ISEND <= 0)     ISEND = ISEND+NPRTRV
  ISENDSET = ISEND
  CALL SET2PE(ISEND,0,0,MYSETW,ISENDSET)
  ISREQ = ISREQ+1
  CALL GSTATS(801,0)
  CALL MPL_SEND(ZSNDBUFV(1:ISENDSIZE),KDEST=NPRCIDS(ISEND), &
   &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),&
   & KTAG=ITAG,CDSTRING='SUTRLE:')
  CALL GSTATS(801,1)
ENDDO

IF(ISREQ > 0) THEN
  CALL GSTATS(801,0)
  CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), &
   & CDSTRING='SUTRLE: WAIT')
  CALL GSTATS(801,1)
ENDIF

IF(IRREQ > 0) THEN
  CALL GSTATS(801,0)
  CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), &
   & CDSTRING='SUTRLE: WAIT')
  CALL GSTATS(801,1)
ENDIF

!*   copy data from buffer to f%rpnm
CALL GSTATS(1141,0)
!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JROC,IRECV,IRECVSET,IGL,JMLOC,IM,IOFFT,IOFFG,JN)
DO JROC=1,NPRTRV-1
  IRECV = MYSETV+JROC
  IF (IRECV > NPRTRV) IRECV = IRECV-NPRTRV
  IRECVSET = IRECV
  CALL SET2PE(IRECV,0,0,MYSETW,IRECVSET)
  IGL = ZRCVBUFV(1,IRECVSET)
  IGLVS(IRECVSET)=IGL
  IF( IGL > 0 )THEN
    DO JMLOC=1,D%NUMP
      IM = D%MYMS(JMLOC)
      IOFFT = D%NPMT(IM)
      IOFFG = D%NPMG(IM)
      DO JN=1,R%NTMAX-IM+2
        F%RPNM(IGL,IOFFT+JN) = ZRCVBUFV(1+IOFFG+JN,IRECVSET)
      ENDDO
    ENDDO
  ENDIF
ENDDO
!$OMP END PARALLEL DO

DEALLOCATE (ZSNDBUFV)

!*    copy data from pnm to rpnm

IGLVS(MYSETV)=KGL
IF(KGL > 0) THEN
  ZRCVBUFV(1,MYSETV)=KGL
  ZRCVBUFV(2:R%NSPOLEG+1,MYSETV)=PNM(1:R%NSPOLEG)
!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JMLOC,IM,IOFFT,IOFFG,JN)
  DO JMLOC=1,D%NUMP
    IM = D%MYMS(JMLOC)
    IOFFT = D%NPMT(IM)
    IOFFG = D%NPMG(IM)
    DO JN=1,R%NTMAX-IM+2
      F%RPNM(KGL,IOFFT+JN) = PNM(IOFFG+JN)
    ENDDO
  ENDDO
!$OMP END PARALLEL DO
ENDIF
CALL GSTATS(1141,1)


!
! Now do communications in the NPRTRW direction
!

!*     Calculate send buffer size

ISENDSIZE=0
DO JROC=1,NPRTRW-1
  ISEND = MYSETW-JROC
  IF (ISEND <= 0)     ISEND = ISEND+NPRTRW
  ISENDSET = ISEND
  CALL SET2PE(ISEND,0,0,ISENDSET,MYSETV)
  IPOS = 0
  DO JM=0,R%NSMAX
    IF (ISENDSET == D%NPROCM(JM) ) IPOS = IPOS + R%NTMAX-JM+2 
  ENDDO
  ISENDSIZE = MAX(IPOS,ISENDSIZE)
ENDDO
ISENDSIZE=ISENDSIZE*NPRTRV+NPRTRV
IRECVSIZE=ISENDSIZE
IF( NPROC > 1 )THEN
  CALL GSTATS(801,0)
  CALL MPL_ALLREDUCE(IRECVSIZE,'MAX',CDSTRING='SUTRLE:')
  CALL GSTATS(801,1)
ENDIF

ALLOCATE (ZSNDBUFW(ISENDSIZE,NPRTRW))
ALLOCATE (ZRCVBUFW(IRECVSIZE,NPRTRW))

CALL GSTATS(1141,0)
!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JROC,ISEND,ISENDSET,IPOS,JV,IGL,JM,JN)
DO JROC=1,NPRTRW-1
  ISEND = MYSETW-JROC
  IF (ISEND <= 0)     ISEND = ISEND+NPRTRW
  ISENDSET = ISEND
  CALL SET2PE(ISEND,0,0,ISENDSET,MYSETV)
!*   copy data to be sent into zsndbufw
  IPOS=0
  DO JV=1,NPRTRV
    IPOS=IPOS+1
    ZSNDBUFW(IPOS,ISENDSET) = IGLVS(JV)
  ENDDO
  DO JV=1,NPRTRV
    IGL = IGLVS(JV)
    IF( IGL > 0 )THEN
      DO JM=0,R%NSMAX
        IF (ISENDSET == D%NPROCM(JM) ) THEN
          DO JN=1,R%NTMAX-JM+2
            IPOS = IPOS + 1
            ZSNDBUFW(IPOS,ISENDSET) = ZRCVBUFV(1+D%NPMG(JM)+JN,JV)
          ENDDO
        ENDIF
      ENDDO
    ENDIF
  ENDDO
  IPOSW(ISENDSET)=IPOS
ENDDO
!$OMP END PARALLEL DO
CALL GSTATS(1141,1)

IRREQ = 0
DO JROC=1,NPRTRW-1

  IRECV = MYSETW+JROC
  IF (IRECV > NPRTRW) IRECV = IRECV-NPRTRW
  IRECVSET = IRECV
  CALL SET2PE(IRECV,0,0,IRECVSET,MYSETV)
!*   receive message (if not empty)

  IRREQ = IRREQ+1
  CALL GSTATS(801,0)
  CALL MPL_RECV(ZRCVBUFW(:,IRECVSET),KSOURCE=NPRCIDS(IRECV), &
   & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IRECVREQ(IRREQ),&
   & KTAG=ITAG,CDSTRING='SUTRLE:')
  CALL GSTATS(801,1)
ENDDO

ISREQ = 0
DO JROC=1,NPRTRW-1
  ISEND = MYSETW-JROC
  IF (ISEND <= 0)     ISEND = ISEND+NPRTRW
  ISENDSET = ISEND
  CALL SET2PE(ISEND,0,0,ISENDSET,MYSETV)
  ISENDSIZE = IPOSW(ISENDSET)
  ISREQ = ISREQ+1
  CALL GSTATS(801,0)
  CALL MPL_SEND(ZSNDBUFW(1:ISENDSIZE,ISENDSET),KDEST=NPRCIDS(ISEND), &
   & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISENDREQ(ISREQ),&
   & KTAG=ITAG,CDSTRING='SUTRLE:')
  CALL GSTATS(801,1)
ENDDO


IF(ISREQ > 0) THEN
  CALL GSTATS(801,0)
  CALL MPL_WAIT(KREQUEST=ISENDREQ(1:ISREQ), &
   & CDSTRING='SUTRLE: WAIT')
  CALL GSTATS(801,1)
ENDIF

IF(IRREQ > 0) THEN
  CALL GSTATS(801,0)
  CALL MPL_WAIT(KREQUEST=IRECVREQ(1:IRREQ), &
   & CDSTRING='SUTRLE: WAIT')
  CALL GSTATS(801,1)
ENDIF

CALL GSTATS(1141,0)
!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JROC,IRECV,IRECVSET,IPOS,IGLVR,JV,IGL,JMLOC,IM,IOFFT,JN)
DO JROC=1,NPRTRW-1
  IRECV = MYSETW+JROC
  IF (IRECV > NPRTRW) IRECV = IRECV-NPRTRW
  IRECVSET = IRECV
  CALL SET2PE(IRECV,0,0,IRECVSET,MYSETV)
!*   copy data from buffer to f%rpnm
  IPOS=0
  DO JV=1,NPRTRV
    IPOS=IPOS+1
    IGLVR(JV)=ZRCVBUFW(IPOS,IRECVSET)
  ENDDO
  DO JV=1,NPRTRV
    IGL = IGLVR(JV)
    IF( IGL > 0 )THEN
      DO JMLOC=1,D%NUMP
        IM = D%MYMS(JMLOC)
        IOFFT = D%NPMT(IM)
        DO JN=1,R%NTMAX-IM+2
          IPOS = IPOS + 1
          F%RPNM(IGL,IOFFT+JN) = ZRCVBUFW(IPOS,IRECVSET)
        ENDDO
      ENDDO
    ENDIF
  ENDDO
ENDDO
!$OMP END PARALLEL DO
CALL GSTATS(1141,1)

DEALLOCATE (ZRCVBUFV)
DEALLOCATE (ZSNDBUFW)
DEALLOCATE (ZRCVBUFW)

IF( NPROC > 1 .AND. KLOOP ==1)THEN
  CALL GSTATS(783,0)
  CALL MPL_BARRIER(CDSTRING='SUTRLE:')
  CALL GSTATS(783,1)
ENDIF
END SUBROUTINE SUTRLE
END MODULE SUTRLE_MOD
